# Title: 
# - False Discovery Rate (FDR) Control for Gridded Data

# Objective:
# - Adjusting p-values Using FDR-BH control procedure

# Authors:
# - Oliver Gutiérrez-Hernández: http://orcid.org/0000-0003-2580-5465. E-mail: olivergh@uma.es
# - Luis V. García: http://orcid.org/0000-0002-5514-2941. E-mail: lv.garcia@csic.es

# Source:
# - Gutiérrez-Hernández, O., & García, L.V. (2025). 
#   False Discovery Rate Estimation and Control in Remote Sensing: 
#   Reliable Statistical Significance in Spatially Dependent Gridded Data. 
#   Remote Sensing Letters, 16(5), 537–554. https://doi.org/10.1080/2150704X.2025.2478664 

# Reference for FDR-BH Control Procedure: 
# - Benjamini, Yoah & Hochberg, Yosef. 1995. “Controlling the False Discovery Rate: A Practical and Powerful Approach to Multiple Testing.” 
#   Journal of the Royal Statistical Society: Series B (Methodological) 57: 289–300. https://doi.org/10.1111/j.2517-6161.1995.tb02031.x

# Practical Applications:
# - Any scenario requiring rigorous multiple hypothesis testing on spatial raster data based on p-values to adjust and reduce false discoveries.
# - Suitable for datasets with positive dependency structures, as the FDR-BH control procedure is valid under such conditions (Benjamini & Yekutieli, 2001).
# - Note: In cases of strong positive dependencies, it may be beneficial to explore or compare alternative methods to the FDR-BH procedure, 
#   such as adaptive, spatially aware, or permutation-based approaches, to improve sensitivity and increase statistical power while maintaining robust control of false discoveries.  

#     - Environmental sciences:
#     - Analyse spatial and temporal trends in climate variables, such as temperature or precipitation.
#     - Identify regions with statistically significant changes or anomalies using adjusted p-values and LBE-based FDR estimation.

#     Remote sensing:
#     - Detect spatiotemporal trends in vegetation indices (e.g., NDVI), land cover dynamics, or urban expansion.
#     - Highlight regions with significant trends or relationships in large-scale raster datasets, minimising false positives.

#     Public health:
#     - Map disease incidence, pollution exposure, or other health-related metrics across regions.
#     - Identify spatial hotspots with significant changes or relationships affecting public health.

#     Agriculture:
#     - Assess changes in crop health, soil moisture, or productivity from spatially gridded datasets.
#     - Detect significant trends or anomalies impacting agricultural productivity, ensuring reduced false discoveries with LBE.

# Input:
# - Raster file: Image in TIFF format containing p-values for analysis.
# - Provide the full path to a raster file containing p-values in TIFF format (.tif). Example: "C:/data/p-values.tif".
# - Ensure the raster file is properly formatted:
#   - p-values must range between 0 and 1.
#   - Invalid or missing values should be clearly identified (e.g., -9999).

# Outputs:
# 1. Summary Statistics:
#    - Total number of valid pixels (non-NA).
#    - Number and percentage of significant pixels (raw p-values and adjusted q-values).
#
# 2. Graphical Outputs:
#    - FDR plot comparing raw p-values with BH thresholds.
#    - Visualisation of raw p-values, binary maps, and adjusted q-values.
#
# 3. File Outputs (maps):
#    - Continuous raster of adjusted p-values (FDR-BH): <base_name>_adjusted_FDR-BH.tif
#    - Binary raster of significant pixels (raw p-values): <base_name>_binary_original.tif
#    - Binary raster of significant pixels (FDR-adjusted): <base_name>_binary_FDR-BH.tif


# --- Prerequisites --- # !!! ATTENTION: ▲ ▲ ▲ ▲ ▲ ▲ ▲ ▲ ▲ ▲ ▲ ▲ ▲

# Install necessary packages before running the script, including dependencies, only if not already installed.

# Function to check and install a package if not already installed
install_if_missing <- function(package) {
  if (!requireNamespace(package, quietly = TRUE)) {
    install.packages(package, dependencies = TRUE)
  } else {
    cat(paste("Package", package, "is already installed.\n"))
  }
}

# Install 'terra' for handling raster data
install_if_missing("terra")


# --- Load necessary libraries ---
library(terra)   # For handling raster data


# --- Set paths and load data ---
raster_path <- "C:/data/p-values.tif"  # !!! ATTENTION: REPLACE WITH YOUR FILE PATH ▲ ▲ ▲ ▲ ▲ ▲ ▲ ▲ ▲ ▲ ▲ ▲ ▲
raster_pvalues <- rast(raster_path)    # Load the raster data using terra
raster_base_name <- tools::file_path_sans_ext(basename(raster_path))  # Extract the base name for file outputs

# Raster information
raster_pvalues  # This will display basic information about the raster (e.g., format, dimensions)
cat("Total number of pixels:", ncell(raster_pvalues), "\n")  # Display the total number of pixels in the raster

# Count valid and invalid pixels
p_values <- values(raster_pvalues)  # Extract the values from the raster
valid_indices <- !is.na(p_values)  # Identify valid pixels (not NA)
num_valid_pixels <- sum(valid_indices)  # Count valid pixels
num_invalid_pixels <- sum(is.na(p_values))  # Count invalid pixels (NA)
cat("Number of total pixels:", ncell(raster_pvalues), "\n")  # Total pixels
cat("Number of invalid pixels (NA):", num_invalid_pixels, "\n")  # Invalid pixels (NA)
cat("Number of valid pixels (containing p-values derived from hypothesis testing):", num_valid_pixels, "\n")  # Valid pixels

# --- Process p-values ---
p_values <- values(raster_pvalues)      # Extract raster values
valid_indices <- !is.na(p_values)       # Identify valid indices
adjusted_p_values <- rep(NA, length(p_values))    # Initialise adjusted p-values (to be filled with FDR-BH adjusted values)
adjusted_p_values[valid_indices] <- p.adjust(p_values[valid_indices], method = "BH")  # Apply the FDR-BH procedure to valid p-values

# --- Generate binary maps based on p-value thresholds ---
binary_original <- ifelse(p_values <= 0.05, 1, 0)  # Binary map of original p-values (raw p-values < 0.05)
binary_adjusted <- ifelse(adjusted_p_values <= 0.05, 1, 0)  # Binary map of adjusted p-values (FDR-BH)
binary_original[!valid_indices] <- NA  # Set NA for invalid pixels in the original map
binary_adjusted[!valid_indices] <- NA  # Set NA for invalid pixels in the adjusted map

# --- Convert to rasters ---
raster_adjusted <- setValues(raster_pvalues, adjusted_p_values)  # Adjusted p-values (FDR-BH)
raster_binary_original <- setValues(raster_pvalues, binary_original)  # Binary map of original p-values
raster_binary_adjusted <- setValues(raster_pvalues, binary_adjusted)  # Binary map of adjusted p-values

# --- Summary Statistics ---
num_valid_pixels <- sum(valid_indices)  # Total number of valid pixels
num_rejected_original <- sum(binary_original == 1, na.rm = TRUE)  # Significant pixels (raw p-values < 0.05)
num_rejected_adjusted <- sum(binary_adjusted == 1, na.rm = TRUE)  # Significant pixels (FDR-adjusted p-values < 0.05)
perc_original <- (num_rejected_original / num_valid_pixels) * 100  # Percentage of significant raw p-values
perc_adjusted <- (num_rejected_adjusted / num_valid_pixels) * 100  # Percentage of significant adjusted p-values

# Print summary statistics
cat("\n--- Summary Statistics ---\n")
cat("Valid pixels:", num_valid_pixels, "\n")  # Total valid pixels
cat("Significant pixels (raw p-values < 0.05):", num_rejected_original, "(", round(perc_original, 2), "%)\n")  # Significant raw p-values
cat("Significant pixels (FDR-adjusted q-values < 0.05):", num_rejected_adjusted, "(", round(perc_adjusted, 2), "%)\n")  # Significant FDR-adjusted p-values

# --- Graphical Outputs ---
# FDR plot (raw p-values vs. BH thresholds)
ordered_p_values <- sort(p_values[valid_indices])  # Sort valid p-values in ascending order
m <- length(ordered_p_values)
thresholds <- (1:m) * 0.05 / m  # Benjamini-Hochberg threshold for each p-value based on rank

par(mfrow = c(1, 1))  # Reset to single plot
plot(1:m, ordered_p_values, pch = 19, col = ifelse(ordered_p_values <= thresholds, 'orange', 'blue'),
     xlab = expression("Rank "*italic(k)), ylab = expression(italic(p)*" values"),
     main = expression("p-value Adjustment (Method: FDR-BH)"),
     ylim = c(0, max(ordered_p_values, na.rm = TRUE)))
lines(1:m, thresholds, col = 'skyblue', lwd = 2)
grid()

# Visualisation of maps
par(mfrow = c(2, 2))  # 2 rows and 2 columns for map visualisation
plot(raster_pvalues, main = expression("Raw "*italic(p)*"-values"), col = hcl.colors(100, "Blues"))
plot(raster_binary_original, main = "Significant trends (original)", col = c("white", "orange"), legend = FALSE)
plot(raster_adjusted, main = expression("Adjusted "*italic(p)*"-values (FDR-BH)"), col = hcl.colors(100, "Blues"))
plot(raster_binary_adjusted, main = "Significant trends (FDR-BH)", col = c("white", "orange"), legend = FALSE)

# --- File Outputs (maps) ---
raster_base_name <- tools::file_path_sans_ext(basename(raster_path))
writeRaster(setValues(raster_pvalues, adjusted_p_values), paste0(raster_base_name, "_adjusted_FDR-BH.tif"), overwrite = TRUE)
writeRaster(raster_binary_original, paste0(raster_base_name, "_binary_original.tif"), overwrite = TRUE)
writeRaster(raster_binary_adjusted, paste0(raster_base_name, "_binary_FDR-BH.tif"), overwrite = TRUE)

# --- Display file outputs ---
cat("\n--- File Outputs ---\n")
cat("Adjusted p-values raster (FDR-BH): <base_name>_adjusted_FDR-BH.tif\n")
cat("Binary raster (raw p-values): <base_name>_binary_original.tif\n")
cat("Binary raster (adjusted p-values, FDR-BH): <base_name>_binary_FDR-BH.tif\n")
